
#---- Main analysis ------#
# This file conducts the second descriptive and multivariate analysis (negative appeals against
# out-groups) as well as the appendix as shown in
# "Supply-side dynamics of group appeals: How dominance affects 
# parties' choice between symbolic and policy-based appeals" (2025)

# ---- 0. Preparations: Data & Packages-----

# Load analysis dataset
groupappeals_sentiments_fullset_v1 <- readRDS("Data/groupappeals_sentiments_fullset_v1.rds")

# Load dictionary
load("Data/dict_groups.RData")

# Load dominance data
load("Data/dominance_since1990.RData") 

# load packages needed
library(tidyverse)
library(ggpubr)
library(openxlsx)
library(gridExtra)


#
#  ----- 1. Dataset preparations for the regression analyses ----
#
# convert variables to appropriate classes
groupappeals_sentiments_fullset_v1$subst_policy <- as.factor(groupappeals_sentiments_fullset_v1$subst_policy)
groupappeals_sentiments_fullset_v1$dominance_bin <- as.factor(groupappeals_sentiments_fullset_v1$dominance_bin)
groupappeals_sentiments_fullset_v1$partei <- as.factor(groupappeals_sentiments_fullset_v1$partei)
groupappeals_sentiments_fullset_v1$gov <- as.factor(groupappeals_sentiments_fullset_v1$gov)
groupappeals_sentiments_fullset_v1$MP_atm <- as.factor(groupappeals_sentiments_fullset_v1$MP_atm)

# convert electoral proximity variables from days to months
groupappeals_sentiments_fullset_v1$proximity_nextelection <- groupappeals_sentiments_fullset_v1$proximity_nextelection / 31
groupappeals_sentiments_fullset_v1$proximity_btw <- groupappeals_sentiments_fullset_v1$proximity_btw / 31

# dominance yearly (for all dominance variables)
groupappeals_sentiments_fullset_v1$dominance_year <- 
  groupappeals_sentiments_fullset_v1$dominance / 365

groupappeals_sentiments_fullset_v1$dominance_total_year <- 
  groupappeals_sentiments_fullset_v1$dominance_total / 365

groupappeals_sentiments_fullset_v1$MP_year <- 
  groupappeals_sentiments_fullset_v1$MP / 365

groupappeals_sentiments_fullset_v1$MP_total_year <- 
  groupappeals_sentiments_fullset_v1$MP_total / 365

groupappeals_sentiments_fullset_v1$dominance_before2000_year <- 
  groupappeals_sentiments_fullset_v1$dominance_before2000 / 365

groupappeals_sentiments_fullset_v1$dominance_00to10_year <- 
  groupappeals_sentiments_fullset_v1$dominance_00to10 / 365

groupappeals_sentiments_fullset_v1$dominance_after2010_year <- 
  groupappeals_sentiments_fullset_v1$dominance_after2010 / 365

groupappeals_sentiments_fullset_v1$dominance_wgt_year <- 
  groupappeals_sentiments_fullset_v1$dominance_wgt / 365

groupappeals_sentiments_fullset_v1$MP_wgt_year <- 
  groupappeals_sentiments_fullset_v1$MP_wgt / 365

# create variable indicating whether a party has ever been the PM party
groupappeals_sentiments_fullset_v1$MP_bin <- 
  ifelse(groupappeals_sentiments_fullset_v1$MP_total == 0, 0,1)

# specify reference category for party variable
groupappeals_sentiments_fullset_v1$partei <- as.factor(groupappeals_sentiments_fullset_v1$partei)

# delete doubled col 
groupappeals_sentiments_fullset_v1[86] <- NULL


# 
# -------- 2. Descriptive Analysis  ------
# 


# ----- 2.1 Bubbles plot (Figure 3) ------

# create new df
groups <- groupappeals_sentiments_fullset_v1

# change count variable to one for each group (because the number of mentions 
# per tweet is not relevant)
for (i in 11:35) {
  
  groups[i] <- ifelse(groups[i] > 0,1,0)
}


groups <- groups %>%
  mutate(targets = str_replace_all(targets, "\\[|\\]|'", ""))  # Remove '[' ']' and ''

# Step 3: Check columns 11:35 and apply the logic
for (col in names(groups)[11:35]) {
  groups <- groups %>%
    mutate(!!col := if_else(
      rowSums(select(groups, 11:35)) > 1 & !(targets %in% dict_groups[[col]]),
      0,  # Set to 0 if the condition is met
      !!sym(col)  # Otherwise, retain the original value
    ))
}

# aggregate group references
groups <- groups %>% 
  group_by(screen_name, partei, sentiment_validated_neg_num) %>%
  summarize(across(students:landlords, sum)) 

# further adjustments
groups$academics <- NULL
groups$screen_name <- NULL
groups$partei <- as.character(groups$partei)

# calculate group totals (##NEW##)
groups_rel <- groups %>%
  dplyr::group_by(partei, sentiment_validated_neg_num) %>%
  summarize(across(students:landlords,sum, na.rm = TRUE))

# absolute values and totals
groups_rel <- groups_rel %>%
  group_by(partei) %>%
  mutate(total = sum(across(students:landlords)))

# relative
groups_rel  <- groups_rel  %>%
  group_by(partei, sentiment_validated_neg_num) %>%
  summarize(round(across(students:landlords, ~ . /total), digits = 3))


# Helper function for the plot
alpha_values <- function(vec){
  
  vec[vec == 0] <- -1
  vec[vec > 0] <- 0
  vec[vec == -1] <- 100
  return(vec)
  
}


# Get the column names in descending alphabetical order
column_names <- names(groups_rel[3:26])
sorted_column_names <- sort(column_names, decreasing = TRUE)

# Create a new dataframe with the desired column order
groups_rel <- cbind(groups_rel[c(1:2)], groups_rel[sorted_column_names])

# change values where no group appeal was issues to zero
groups_rel[3:26][groups_rel[3:26] == 0] <- NA

# exclude free voters
groups_rel <- groups_rel %>% filter(partei != "FW")


# create ggplot object
results <- ggplot()

# append values for each group and mode (symbolic/policy-based) separately
results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(3-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,3])),
                                color = "black", pch = 21
)


results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(3-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,3])),
                                color = "black", pch = 21
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(4-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,4])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(4-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,4])),
                                color = "black", pch = 21
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(5-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,5])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(5-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,5])),
                                color = "black", pch = 21
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(6-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,6])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(6-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,6])),
                                color = "black", pch = 21
)




results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(7-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,7])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(7-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,7])),
                                color = "black", pch = 21
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(8-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,8])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(8-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,8])),
                                color = "black", pch = 21
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(9-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,9])),
                                color = "black", pch = 21
)


results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(9-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,9])),
                                color = "black", pch = 21
                                
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(10-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,10])),
                                color = "black", pch = 21
)

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(10-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,10])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(11-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,11])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(11-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,11])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(12-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,12])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(12-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,12])),
                                color = "black", pch = 21
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(13-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,13])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(13-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,13])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(14-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,14])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(14-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,14])),
                                color = "black", pch = 21 
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(15-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,15])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(15-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,15])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(16-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,16])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(16-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,16])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(17-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,17])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(17-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,17])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(18-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,18])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(18-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,18])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(19-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,19])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(19-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,19])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(20-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,20])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(20-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,20])),
                                color = "black", pch = 21 
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(21-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,21])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(21-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,21])),
                                color = "black", pch = 21 
)



results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(22-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,22])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(22-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,22])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(23-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,23])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(23-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,23])),
                                color = "black", pch = 21 
)

results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(24-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,24])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(24-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,24])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(25-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,25])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(25-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,25])),
                                color = "black", pch = 21 
)


results <- results + geom_point(aes(x = c(1:6),
                                    y = rep(26-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 0]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 0,][,26])),
                                color = "black", pch = 21
) 

results <- results + geom_point(aes(x = c(10:15),
                                    y = rep(26-2, 6),
                                    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
                                    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1,][,26])),
                                color = "black", pch = 21 
)


# create party labels
party_labels <- data.frame(
  x = c(1:6, 10:15),
  party = rep(unique(groups_rel$partei), 2)
)

# Define custom party colors
party_colors <- c(
  "CDU" = "black",
  "SPD" = "darkred",
  "Grüne" = "darkgreen",
  "FDP" = "yellow",
  "AfD" = "blue",
  "Linke" = "pink"
)

# Create the final plot with party-specific colors
bubble_plot <- results +
  geom_point(aes(
    x = c(10:15),
    y = rep(23 - 2, 6),
    fill = unlist(groups_rel$partei[groups_rel$sentiment_validated_neg_num == 1]),
    size = unlist(groups_rel[groups_rel$sentiment_validated_neg_num == 1, ][, 23])
  ),
  color = "black", pch = 21) +
  scale_fill_manual(
    name = "Party",
    values = party_colors
  ) +
  theme_test() +
  theme(
    axis.title = element_blank(),
    text = element_text(family = "Arial", size= 14),
    legend.position = "none"
  )

# Add party labels as a secondary x-axis
bubble_plot +
  geom_text(
    data = party_labels,
    aes(x = x, y = 25, label = party),
    vjust = -0.5, hjust = 0.5, size = 4,
    color = "black", fontface = "bold"
  ) +
  scale_y_continuous(
    breaks = c(1:24),
    labels = colnames(groups_rel)[3:26]
  ) +
  scale_x_continuous(
    breaks = c(4, 12),
    labels = c("Positive", "Negative"),
    name = ""
  ) +
  theme(
    panel.grid.major.y = element_line(color = "gray", size = 0.3),
    panel.grid.minor.y = element_blank()
  )



# 
# ---- 2.2 Negative appeals by party (branch) (Figure 4)-----
#

# Create plot dataset
neg_perparty <- groupappeals_sentiments_fullset_v1 %>%
  group_by(partei,land) %>%
  mutate(sum_appeal = n()) %>%
  mutate(sum_subst = sum(sentiment_validated_neg_num)) %>%
  mutate(prop_subst = sum(sentiment_validated_neg_num)/sum_appeal) %>%
  mutate(landesverband = str_c(partei,"_",land))

neg_perparty <- neg_perparty %>%
  dplyr::select(landesverband,sum_appeal, sum_subst, prop_subst) %>%
  distinct()

# merge share of policy-based appeals to dominance data 
neg_perparty <- merge(neg_perparty, dominance_since1990, by = c("partei", 
                                                                "land"))

# convert dominance to years
neg_perparty$dominance_year <- neg_perparty$dominance_total / 365


# combine plots

# Function to extract legend from a ggplot
get_legend <- function(my_ggplot) {
  g <- ggplotGrob(my_ggplot)
  legend <- g$grobs[[which(sapply(g$grobs, function(x) x$name) == "guide-box")]]
  return(legend)
}

# Import data from Skript 02a
load("Data/subst_perparty.RData")


# First plot (legend removed)
plot_substperparty <- ggplot(subst_perparty, aes(x = dominance_year, y = prop_subst)) +
  geom_point(aes(color = partei), size = 2, alpha = 0.7) +
  geom_smooth(method = lm, se = TRUE, color = "black") +
  xlab("Dominance in years") + 
  ylab("Share of policy-based group appeals") +
  theme_bw() +
  theme(axis.text = element_text(family = "Arial", size = 14),
        axis.title = element_text(family = "Arial", size = 16),
        legend.position = "none") +  # Remove legend from this plot
  scale_color_manual(values = c("CDU" = "black",
                                "SPD" = "darkred",
                                "Grüne" = "darkgreen",
                                "FDP" = "yellow",
                                "AfD" = "blue",
                                "Linke" = "pink"
  ))  +
  labs(color = "Party")


# Second plot (keep legend)
plot_negperparty <- ggplot(neg_perparty, aes(x = dominance_year, y = prop_subst)) +
  geom_point(aes(color = partei), size = 2, alpha = 0.7) +
  geom_smooth(method = lm, se = TRUE, color = "black") +
  xlab("Dominance in years") + 
  ylab("Share of negative group appeals") +
  theme_bw() +
  theme(axis.text = element_text(family = "Arial", size = 14),
        axis.title = element_text(family = "Arial", size = 16)) +  
  scale_color_manual(values = c("CDU" = "black",
                                "SPD" = "darkred",
                                "Grüne" = "darkgreen",
                                "FDP" = "yellow",
                                "AfD" = "blue",
                                "Linke" = "pink"
  ))  +
  labs(color = "Party")

# Extract legend from the second plot
legend <- get_legend(plot_negperparty)

# Remove legend from the second plot as well
plot_negperparty <- plot_negperparty + theme(legend.position = "none")

# Arrange plots with shared legend
grid.arrange(plot_substperparty, plot_negperparty, legend,
             ncol = 3, widths = c(4, 4, 1))



# ---- 3. Negative sentiment tweets - Main regression (Table 2) -----
sentm1 <- glm(sentiment_validated_neg_num ~ dominance_bin + 
                partei,
              data = groupappeals_sentiments_fullset_v1, 
              family= "binomial")

sentm3 <- glm(sentiment_validated_neg_num ~ dominance_year + 
                partei,
              data = groupappeals_sentiments_fullset_v1, 
              family= "binomial")

sentm4 <- glm(sentiment_validated_neg_num~ dominance_year*MP_year + 
                gov+
                partei,
              data = groupappeals_sentiments_fullset_v1, 
              family= "binomial")


plot_model(sentm3, type = "pred")


# combine models in one table (Export as html)

#Define models
models <- list(sentm1, sentm3, sentm4)

# Compute odds ratios, confidence intervals, and p-values
or_list <- lapply(models, extract_or)
ci_list <- lapply(models, compute_ci)
p_values_list <- lapply(models, extract_p_values)

# Generate the regression table using stargazer
stargazer(models, 
          type = "html",
          coef = or_list,  # Display odds ratios instead of log-odds
          ci = TRUE,  
          ci.custom = ci_list,  
          p = p_values_list,  
          omit = "partei",  
          digits = 3,
          title = "Regression Table (Sentiment Analysis) with Odds Ratios and CIs",
          dep.var.labels.include = FALSE,
          add.lines = list(
            c("Party dummies", "YES", "YES", "YES")
          ),
          out = "Figures/regtable_sentiment_mar2025.html")





# ---- 4. Calculate marginal effects of the interaction term (Figure 7) ----
margins_interaction <- margins(sentm4,
                               at = list(MP_year = c(1:30)),
                               variables = c("dominance_year")
)
sum_interaction <- summary(margins_interaction)
sum_interaction$y <- 1:nrow(sum_interaction)

# Plot marginal effects of dominance by PM in years

plot_amesbyMPyears <- ggplot(data = sum_interaction,
                             aes(y = AME, x = y, ymin = lower, ymax = upper,
                                 xmin = 0, xmax = 30)) +
  geom_point(color = "black") +
  geom_errorbar() +
  scale_y_continuous(name = "PM in years",
                     breaks=c(0,5,10,15,20,25,30)
  ) +
  geom_hline(yintercept= 0, color = 'black', linetype = 'dashed', alpha = 0.5) +
  xlab("PM in years") +
  ylab("Marginal Effect")+
  #ylab(element_blank()) +
  ylim(-0.005, 0.005) +
  xlim(0,30)+
  theme_bw() +
  theme(
    axis.title.x = element_text(size=14),
    axis.title.y = element_text(size=14),
    axis.text.y =  element_text(size=12),
    axis.text.x =  element_text(size=12)
  )




# ---- 5. Robustness checks for the appendix -----


#
# ----- 5.1 Negative appeals aggregated to national parties (Figure A3) -----
#

# create grouped df
byparty <- neg_perparty %>%
  group_by(partei) %>%
  summarise(
    count = n(),
    mean = mean(prop_subst, na.rm = TRUE),
    sd = sd(prop_subst, na.rm = TRUE)
  )

# change party variable to factor
byparty$partei <- as.factor(byparty$partei)

# create boxplot
ggboxplot(neg_perparty, x = "partei", y = "prop_subst",
          ylab = "Proportion of negative group appeals", xlab = "Party") +
  scale_y_continuous(limits = c(0,0.7), 
                     labels = c("0", "20", "40", "60"), 
                     breaks = c(0,0.2,0.4,0.6), 
                     name = "Share of negative group appeals") +
  theme_classic() +
  theme(
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.y =  element_text(size = 12),
    axis.text.x =  element_text(size = 12)
  )




# ----- 5.2 Different types of dominance (Figure A5) -----

# used to govern, never governed, permanent dominance, current government

dominance_since1990 <- dominance_since1990 %>%
  mutate(type = case_when(dominance_total == 0 ~ "never_governed",
                          before_2010 >= 0 & after_2010 == 0 & before_2000 > 0 ~ "usedtogovern",
                          before_2010 > 0 & after_2010 > 0 & before_2000 > 0 & MP_total > 0 ~ "permanently_dominant",
                          #dominance_total > 8000 ~ "permanently_dominant",
                          before_2010 == 0  & before_2000 == 0 & after_2010 > 0 ~ "newly_governing"))

# merge share of policy-based appeals to dominance data 
neg_perparty <- merge(neg_perparty, dominance_since1990, by = c("partei", 
                                                                "land"))

# Filter out rows where type is NA
filtered_data <- subset(neg_perparty, !is.na(type))

#Calculate the median of prop_subst for each type
median_data <- neg_perparty %>%
  group_by(type) %>%
  summarise(median_prop_subst = median(prop_subst, na.rm = TRUE)) %>%
  arrange(median_prop_subst)  # Arrange by median prop_subst

# Reorder the levels of type based on median prop_subst
filtered_data$type <- factor(filtered_data$type, levels = median_data$type)

# Create a boxplot with reordered levels
ggplot(filtered_data, aes(x = type, y = prop_subst, fill = type)) +
  geom_boxplot() +
  stat_summary(fun=mean, geom="point", shape=17, size=3, color="black", position=position_dodge(0.75)) +  # Add means as triangular dots
  labs(title = "Share of  negative group appeals per government type",
       x = "Party Type",
       y = "Share of negative group appeals") +
  theme_minimal() +
  theme(legend.position = "none")




#
# ---- 5.3 Regression models without the AfD (Figure A7) -----
#

# create subset including only the AfD branches
withoutafd <- groupappeals_sentiments_fullset_v1 %>% filter(partei != "AfD")

# re-estimate regression models
sentm1 <- glm(sentiment_validated_neg_num ~ dominance_bin + 
                partei,
              #as.factor(land),
              data = withoutafd , 
              family= "binomial")


sentm2 <- glm(sentiment_validated_neg_num ~ dominance_year + 
                partei,
              #as.factor(land),
              data = withoutafd, 
              family= "binomial")


sentm3 <- glm(sentiment_validated_neg_num~ dominance_year*MP_year + 
                gov+
                partei,
              #as.factor(land),
              data = withoutafd, 
              family= "binomial")



# Combine models in one table (export as html)

#Define models
models <- list(sentm1, sentm2, sentm3)

# Compute odds ratios, confidence intervals, and p-values
or_list <- lapply(models, extract_or)
ci_list <- lapply(models, compute_ci)
p_values_list <- lapply(models, extract_p_values)

# Generate the regression table using stargazer
stargazer(models, 
          type = "html",
          coef = or_list,  # Display odds ratios instead of log-odds
          ci = TRUE,  # Enable confidence intervals
          ci.custom = ci_list,  # Provide exponentiated confidence intervals
          p = p_values_list,  # Ensure correct p-values are used
          omit = "land|partei",  # Omit specified variables
          digits = 3,
          title = "Regression Table (Sentiment Analysis Without AfD) with Odds Ratios and CIs",
          dep.var.labels.include = FALSE,
          add.lines = list(
            c("State dummies", "YES", "YES", "YES"),
            c("Party dummies", "YES", "YES", "YES")
          ),
          out = "Figures/regtable_sentiment_woafd_mar2025.html")


# ------ 5.4 Validation (Table A2, for validation data please contact author) ------

# set.seed(123)
# sampled_tweets <- groupappeals_sentiments_fullset_v1 %>%
#   sample_n(500)
# 
# write.xlsx(sampled_tweets, file = "Data/validation_translationEN.xlsx")



# read coded data back in
# val <- read.xlsx("")
# 
# val$label <- ifelse(val$sentiment_validated_neg_num == 0, "posorneutr", val$label)
# val$label <- ifelse(val$sentiment_validated_neg_num == 1, "negative", val$label)
# 
# val$sentiment_val <- ifelse(val$sentiment_val  == "positive", "posorneutr", val$sentiment_val)
# val$sentiment_val <- ifelse(val$sentiment_val  == "neutral", "posorneutr", val$sentiment_val)
# 
# handcodedvar <- groupappeals_sentiments_fullset_v1 %>%
#   select(docid2, translated_targets, sentiment_validated_neg_num)
# 
# val <- merge(val, handcodedvar,
#              by = c("docid2", "translated_targets"),
#              all.x = T,
#              all.y = F)
# 
# 
# val <- val %>%
#   select(label, sentiment_val)
# 
# 
# 
# # Calculate the accuracy
# accuracy <- sum(val$label == val$sentiment_val) / nrow(val)
# 
# # Print the accuracy
# cat("Accuracy:", round(accuracy * 100, 2), "%\n")




# Create a confusion matrix
# confusion_matrix <- table(Predicted = val$label, Actual = val$sentiment_val)
# 
# # Extract confusion matrix values
# TP <- confusion_matrix["posorneutr", "posorneutr"]  # True Positives
# TN <- confusion_matrix["negative", "negative"]  # True Negatives
# FP <- confusion_matrix["posorneutr", "negative"]  # False Positives
# FN <- confusion_matrix["negative", "posorneutr"]  # False Negatives
# 
# # Calculate accuracy
# accuracy <- (TP + TN) / sum(confusion_matrix)
# 
# # Calculate precision
# precision <- TP / (TP + FP)
# 
# # Calculate recall
# recall <- TP / (TP + FN)
# 
# # Calculate F1-score
# f1_score <- 2 * (precision * recall) / (precision + recall)
# 
# # Print results
# cat("Accuracy:", round(accuracy * 100, 2), "%\n")
# cat("Precision:", round(precision * 100, 2), "%\n")
# cat("Recall:", round(recall * 100, 2), "%\n")
# cat("F1-Score:", round(f1_score * 100, 2), "%\n")


#
# ---- 5.5 Regression models with alternative dominance specifications (Table A4) ----
#
sentm3_alt <- glm(sentiment_validated_neg_num ~ dominance_wgt_year + 
                    partei,
                  #as.factor(land),
                  data = groupappeals_sentiments_fullset_v1, 
                  family= "binomial")


sentm4_alt <- glm(sentiment_validated_neg_num~ dominance_wgt_year*MP_wgt_year + 
                    gov+
                    partei,
                  #as.factor(land),
                  data = groupappeals_sentiments_fullset_v1, 
                  family= "binomial")

# Combine models in one table and export as html

# Define models
models <- list(sentm3_alt, sentm4_alt)

# Compute odds ratios, confidence intervals, and p-values
or_list <- lapply(models, extract_or)
ci_list <- lapply(models, compute_ci)
p_values_list <- lapply(models, extract_p_values)

# Generate the regression table using stargazer
stargazer(models, 
          type = "html",
          coef = or_list,  # Display odds ratios instead of log-odds
          ci = TRUE,  # Enable confidence intervals
          ci.custom = ci_list,  # Provide exponentiated confidence intervals
          p = p_values_list,  # Ensure correct p-values are used
          omit = "land|partei",  # Omit specified variables
          digits = 3,
          title = "Regression Table (Sentiment Analysis - Weighted) with Odds Ratios and CIs",
          model.names = FALSE,  # No default model names
          column.labels = c("Model 4", "Model 5"),  # Custom model names
          dep.var.labels.include = FALSE,
          add.lines = list(
            c("State dummies", "YES", "YES"),
            c("Party dummies", "YES", "YES")
          ),
          out = "Figures/regtable_annex_sent_weighted_mar2025.html")


#
# ---- 5.6 Additional model specifications (Table A5) ----
#

sent_a1 <- glm(sentiment_validated_neg_num ~ dominance_bin +
                 #partei+
                 as.factor(land),
               data = groupappeals_sentiments_fullset_v1, 
               family= "binomial")


sent_a2 <- glm(sentiment_validated_neg_num ~ dominance_year +
                 #partei+
                 as.factor(land),
               data = groupappeals_sentiments_fullset_v1, 
               family= "binomial")


sent_a3 <- glm(sentiment_validated_neg_num ~ MP_year +
                 partei,
               #as.factor(land),
               data = groupappeals_sentiments_fullset_v1, 
               family= "binomial")

sent_a4 <- glm(sentiment_validated_neg_num ~ MP_year +
                 #partei,
                 as.factor(land),
               data = groupappeals_sentiments_fullset_v1, 
               family= "binomial")


# Define models
models <- list(sent_a1, sent_a2, sent_a3, sent_a4)

# Compute odds ratios, confidence intervals, and p-values
or_list <- lapply(models, extract_or)
ci_list <- lapply(models, compute_ci)
p_values_list <- lapply(models, extract_p_values)

# Generate the regression table using stargazer
stargazer(models, 
          type = "html",
          coef = or_list,  # Display odds ratios instead of log-odds
          ci = TRUE,  # Enable confidence intervals
          ci.custom = ci_list,  # Provide exponentiated confidence intervals
          p = p_values_list,  # Ensure correct p-values are used
          omit = "land|partei",  # Omit specified variables
          digits = 3,
          title = "Regression Table (Sentiment Extra Appendix) with Odds Ratios and CIs",
          dep.var.labels.include = FALSE,
          add.lines = list(
            c("State dummies", "YES", "YES", "YES", "YES"),
            c("Party dummies", "NO", "NO", "YES", "YES")
          ),
          out = "Figures/regtable_sent_extraappendix_mar2025.html")



#
# ------ 5.7 Tabulated data (Table A10) -----
#

# Create the table
table_data <- table(groupappeals_sentiments_fullset_v1$dominance_bin, groupappeals_sentiments_fullset_v1$sentiment_validated_neg_num)

# Convert to percentages by row
percentages <- prop.table(table_data, margin = 1) * 100

# Print the result
percentages





























